#######################################################
####                                               ####
####  Input: MP-by-debate data for analysis        ####
####  Output: Main paper analysis (Figures 1-6)    ####
####                                               ####
#######################################################

# Load libraries

library(data.table) # v1.14.8
library(plyr) # v1.8.9
library(ggplot2) # v3.4.3
library(sandwich) # v3.0-2
library(dplyr) # v1.1.3
library(broom) # v1.0.5
library(plm) # v2.6-3
library(margins) # v0.3.26
library(estimatr) # v1.0.0
library(clubSandwich) # v0.5.10

# Load data

load("working/speech_scores.Rdata")

# Gender gaps and seniority --------------------------------------------------------------------------------------------------------------

plot_seniority <- function(y = speech_scores$children_std, ...) {
  speech_scores$y_tmp <- y
  
  # Estimate model
  tmp_mod <-
    lm(
      y_tmp ~ gender * as.factor(round(years_in_parliament)) + age_years + party + attends_cabinet +
        attends_shadow_cabinet + is_gov_minister + is_opp_minister + gov_mp +
        is_committee_chair + marginality + occupation_class,
      data = speech_scores
    )
  
  # Extract coefficients
  coefs <- coef(summary(tmp_mod))
  
  # Extract effect of gender for year 0
  baseline <- coefs[2, 1]
  baseline_se <- coefs[2, 2]
  baseline_hi <- coefs[2, 1] + 1.96 * coefs[2, 2]
  baseline_lo <- coefs[2, 1] - 1.96 * coefs[2, 2]
  
  # Extract interaction terms
  interaction_terms <- grep("\\:", rownames(coefs))
  
  # Calculate marginal effects
  marginal_effects <- baseline + coefs[interaction_terms, 1]
  
  # Cluster standard errors by person
  interaction_vcov <- vcovCL(tmp_mod, ~ person_id)
  
  # Calculate standard errors for sums of coefficients
  marginal_effect_ses <-
    sapply(1:length(interaction_terms), function(x)
      sqrt(
        interaction_vcov[2, 2] + interaction_vcov[interaction_terms[x], interaction_terms[x]] + 
          (2*interaction_vcov[2, interaction_terms[x]])
      ))
  
  # Upper and lower confidence intervals
  marginal_effects_hi <-
    marginal_effects + 1.96 * marginal_effect_ses
  marginal_effects_lo <-
    marginal_effects - 1.96 * marginal_effect_ses
  
  # Combine all into a data frame
  out <- data.frame(
    years = 0:length(marginal_effects),
    est = c(baseline, marginal_effects),
    hi = c(baseline_hi, marginal_effects_hi),
    lo = c(baseline_lo, marginal_effects_lo),
    se = c(baseline_se, marginal_effect_ses),
    row.names = NULL
  )
  out$sig <- ifelse(abs(out$est / out$se) >= 1.96, TRUE, FALSE)
  
  # Combine data frame into plot function
  plot(
    out$years,
    out$est,
    ylim = c(-0.5, 0.5),
    xlim = c(0, 20),
    xlab = "Years in Parliament",
    pch = 20,
    cex.lab = 0.8,
    ylab = "Woman MP difference",
    axes = FALSE,
    col = ifelse(out$sig, "black", alpha("black", .2))
  )
  segments(
    x0 = out$years,
    y0 = out$lo,
    y1 = out$hi,
    col = ifelse(out$sig, "black", alpha("black", .2)),
    lwd = 2
  )
  abline(h = 0, lty = "dashed")
  axis(
    1,
    at = 0:20,
    las = 2,
    cex.axis = 1,
    line = 1
  )
  axis(2,
       at = seq(from = -0.5, to = 0.5, by = 0.25),
       cex.axis = 1)
  title( ... , line = -0.7)
  
}

pdf("analysis/plots/figure_1.pdf", 11.5, 7.5)
par(mfrow = c(3, 4), mai = c(0.35, 0.3, 0.1, 0.2))
plot_seniority(speech_scores$children_std, "Children & Family")
plot_seniority(speech_scores$health_std, "Health")
plot_seniority(speech_scores$education_std, "Education")
plot_seniority(speech_scores$social_std, "Welfare")
plot_seniority(speech_scores$crime_std, "Crime & Policing")
plot_seniority(speech_scores$environment_std, "Environment")
plot_seniority(speech_scores$transport_std, "Transport")
plot_seniority(speech_scores$economy_std, "Finance & Economy")
plot_seniority(speech_scores$trade_std,  "Trade")
plot_seniority(speech_scores$agriculture_std, "Agriculture")
plot_seniority(speech_scores$defence_std, "Defence")
dev.off()

# Years in parliament for men & women --------------------------------------------------------------------------------------------------------------

plot_seniority_levels <-
  function(y = speech_scores$children_std, ...) {
    speech_scores$y_tmp <- y
    
    tmp_mod <-
      lm_robust(
        y_tmp ~ gender * as.factor(round(years_in_parliament)) + factor(parliamentary_term) +
          party + age_years + attends_cabinet +
          attends_shadow_cabinet + is_gov_minister + is_opp_minister + gov_mp +
          is_committee_chair + marginality + occupation_class,
        data = speech_scores,
        clusters = person_id,
        se_type = "stata"
      )
    
    predict_data_function = function(years_in_parliament_tmp) {
      new_data = data.frame(
        gender = c("Male", "Female"),
        party = "Labour",
        age_years = round(mean(speech_scores$age_years)),
        attends_cabinet = FALSE,
        attends_shadow_cabinet = FALSE,
        gov_mp = FALSE,
        is_gov_minister = FALSE,
        is_opp_minister = FALSE,
        is_committee_chair = FALSE,
        occupation_class = "Political",
        parliamentary_term = "2005-2010",
        marginality = round(mean(speech_scores$marginality)),
        years_in_parliament = years_in_parliament_tmp
      )
      
      pred = predict(tmp_mod, newdata = new_data, interval = "confidence")
      
      data.frame(
        pred,
        years_in_parliament = new_data$years_in_parliament,
        party = new_data$party,
        gender = new_data$gender
      )
      
    }
    
    out = purrr::map_dfr(0:20, predict_data_function)
    
    plot(
      out$years_in_parliament[out$gender == "Male"],
      out$fit.fit[out$gender == "Male"],
      ylim = c(-0.5, 0.5),
      xlab = "Years in Parliament",
      pch = 20,
      cex.lab = 0.8,
      ylab = "Average topic use",
      axes = FALSE
    )
    segments(
      x0 = out$years_in_parliament[out$gender == "Male"],
      y0 = out$fit.lwr[out$gender == "Male"],
      y1 = out$fit.upr[out$gender == "Male"]
    )
    points(out$years_in_parliament[out$gender == "Female"], 
           out$fit.fit[out$gender == "Female"])
    segments(
      x0 = out$years_in_parliament[out$gender == "Female"],
      y0 = out$fit.lwr[out$gender == "Female"],
      y1 = out$fit.upr[out$gender == "Female"],
      lty = "dashed"
    )
    axis(
      1,
      at = seq(from = 0, to = 20, by = 1),
      las = 2,
      cex.axis = 1,
      line = 1
    )
    axis(2,
         at = seq(from = -0.5, to = 0.5, by = 0.25),
         cex.axis = 1)
    legend(
      "topright",
      legend = c("Man", "Woman"),
      pch = c(16, 1),
      lty = c("solid", "dashed"),
      bty = "n",
      cex = 0.8
    )
    title( ... , line = -0.7)
    
  }

pdf("analysis/plots/figure_2.pdf", 11.5, 7.5)
par(mfrow = c(3, 4), mai = c(0.35, 0.3, 0.1, 0.2))
plot_seniority_levels(speech_scores$children_std, "Children & Family")
plot_seniority_levels(speech_scores$health_std, "Health")
plot_seniority_levels(speech_scores$education_std, "Education")
plot_seniority_levels(speech_scores$social_std, "Welfare")
plot_seniority_levels(speech_scores$crime_std, "Crime & Policing")
plot_seniority_levels(speech_scores$environment_std, "Environment")
plot_seniority_levels(speech_scores$transport_std, "Transport")
plot_seniority_levels(speech_scores$economy_std,  "Finance & Economy")
plot_seniority_levels(speech_scores$trade_std, "Trade")
plot_seniority_levels(speech_scores$agriculture_std,"Agriculture")
plot_seniority_levels(speech_scores$defence_std, "Defence")
dev.off()

# Women representing women?

pdf("analysis/plots/figure_4.pdf", 6.7, 3.5)
par(mfrow = c(1, 2), mai = c(0.8, 0.40, 0.35, 0.4))
plot_seniority(speech_scores$women_dic_std, "")
plot_seniority_levels(speech_scores$women_dic_std, "")
dev.off()

# Gender gaps in talking about women within different topics

topic_types <- c("women_dic_std")

debate_types <- c("Defence", "Finance & Economy", "Agriculture", "Transport", "Health", "Education",
                  "Welfare", "Trade", "Environment", "Children & Family", "Crime & Policing")

covariate_formula <- "~ gender"

est_conf_ints <- function(x = bivariate_model){
  
  coefs <- coef(summary(x))
  est <- coefs[,1]
  se <- coefs[,2]
  hi <- est + 1.96 * se
  lo <- est - 1.96 * se
  return(data.frame(est, hi, lo))
}

coef_list <- list()
defence_list <- list()
economy_list <- list()
agriculture_list <- list()
transport_list <- list()
health_list <- list()
education_list <- list()
welfare_list <- list()
trade_list <- list()
environment_list <- list()
children_list <- list()
crime_list <- list()

i<-0

for(topic in topic_types){
  
  print(topic)
  i <- i+1
  
  defence_model <- lm(as.formula(paste0(topic, covariate_formula)),
                      data = speech_scores[speech_scores$defence_debate==TRUE])
  
  economy_model <- lm(as.formula(paste0(topic, covariate_formula)),
                      data = speech_scores[speech_scores$economy_debate==TRUE])
  
  agriculture_model <- lm(as.formula(paste0(topic, covariate_formula)),
                          data = speech_scores[speech_scores$agriculture_debate==TRUE])
  
  transport_model <- lm(as.formula(paste0(topic, covariate_formula)),
                        data = speech_scores[speech_scores$transport_debate==TRUE])
  
  health_model <- lm(as.formula(paste0(topic, covariate_formula)),
                     data = speech_scores[speech_scores$health_debate==TRUE])
  
  education_model <- lm(as.formula(paste0(topic, covariate_formula)),
                        data = speech_scores[speech_scores$education_debate==TRUE])
  
  welfare_model <- lm(as.formula(paste0(topic, covariate_formula)),
                      data = speech_scores[speech_scores$welfare_debate==TRUE])
  
  trade_model <- lm(as.formula(paste0(topic, covariate_formula)),
                    data = speech_scores[speech_scores$trade_debate==TRUE])
  
  environment_model <- lm(as.formula(paste0(topic, covariate_formula)),
                          data = speech_scores[speech_scores$environment_debate==TRUE])
  
  children_model <- lm(as.formula(paste0(topic, covariate_formula)),
                       data = speech_scores[speech_scores$children_debate==TRUE])
  
  crime_model <- lm(as.formula(paste0(topic, covariate_formula)),
                    data = speech_scores[speech_scores$crime_debate==TRUE])
  
  out <- list()
  out$defence <- est_conf_ints(defence_model)
  out$economy <- est_conf_ints(economy_model)
  out$agriculture <- est_conf_ints(agriculture_model)
  out$transport <- est_conf_ints(transport_model)
  out$health <- est_conf_ints(health_model)
  out$education <- est_conf_ints(education_model)
  out$welfare <- est_conf_ints(welfare_model)
  out$trade <- est_conf_ints(trade_model)
  out$environment <- est_conf_ints(environment_model)
  out$children <- est_conf_ints(children_model)
  out$crime <- est_conf_ints(crime_model)
  
  coef_list[[i]] <- out
  
}

defence_out <- lapply(1:length(coef_list), function(x) {
  tmp <- coef_list[[x]]$defence
  tmp$topic <- names(coef_list)[x]
  tmp[rownames(tmp) == "genderFemale",]
})

economy_out <- lapply(1:length(coef_list), function(x) {
  tmp <- coef_list[[x]]$economy
  tmp$topic <- names(coef_list)[x]
  tmp[rownames(tmp) == "genderFemale",]
})

agriculture_out <- lapply(1:length(coef_list), function(x) {
  tmp <- coef_list[[x]]$agriculture
  tmp$topic <- names(coef_list)[x]
  tmp[rownames(tmp) == "genderFemale",]
})

transport_out <- lapply(1:length(coef_list), function(x) {
  tmp <- coef_list[[x]]$transport
  tmp$topic <- names(coef_list)[x]
  tmp[rownames(tmp) == "genderFemale",]
})

health_out <- lapply(1:length(coef_list), function(x) {
  tmp <- coef_list[[x]]$health
  tmp$topic <- names(coef_list)[x]
  tmp[rownames(tmp) == "genderFemale",]
})

education_out <- lapply(1:length(coef_list), function(x) {
  tmp <- coef_list[[x]]$education
  tmp$topic <- names(coef_list)[x]
  tmp[rownames(tmp) == "genderFemale",]
})

welfare_out <- lapply(1:length(coef_list), function(x) {
  tmp <- coef_list[[x]]$welfare
  tmp$topic <- names(coef_list)[x]
  tmp[rownames(tmp) == "genderFemale",]
})

trade_out <- lapply(1:length(coef_list), function(x) {
  tmp <- coef_list[[x]]$trade
  tmp$topic <- names(coef_list)[x]
  tmp[rownames(tmp) == "genderFemale",]
})

environment_out <- lapply(1:length(coef_list), function(x) {
  tmp <- coef_list[[x]]$environment
  tmp$topic <- names(coef_list)[x]
  tmp[rownames(tmp) == "genderFemale",]
})

children_out <- lapply(1:length(coef_list), function(x) {
  tmp <- coef_list[[x]]$children
  tmp$topic <- names(coef_list)[x]
  tmp[rownames(tmp) == "genderFemale",]
})

crime_out <- lapply(1:length(coef_list), function(x) {
  tmp <- coef_list[[x]]$crime
  tmp$topic <- names(coef_list)[x]
  tmp[rownames(tmp) == "genderFemale",]
})

defence_out <- data.frame(do.call("rbind", defence_out))
economy_out <- data.frame(do.call("rbind", economy_out))
agriculture_out <- data.frame(do.call("rbind", agriculture_out))
transport_out <- data.frame(do.call("rbind", transport_out))
health_out <- data.frame(do.call("rbind", health_out))
education_out <- data.frame(do.call("rbind", education_out))
welfare_out <- data.frame(do.call("rbind", welfare_out))
trade_out <- data.frame(do.call("rbind", trade_out))
environment_out <- data.frame(do.call("rbind", environment_out))
children_out <- data.frame(do.call("rbind", children_out))
crime_out <- data.frame(do.call("rbind", crime_out))

defence_out$model <- "Defence"
economy_out$model <- "Finance & Economy"
agriculture_out$model <- "Agriculture"
transport_out$model <- "Transport"
health_out$model <- "Health"
education_out$model <- "Education"
welfare_out$model <-  "Welfare"
trade_out$model <- "Trade"
environment_out$model <-  "Environment"
children_out$model <- "Children & Family"
crime_out$model <- "Crime & Policing"

out <- rbind(defence_out, economy_out, agriculture_out, transport_out, health_out, education_out, welfare_out,
             trade_out, environment_out, children_out, crime_out)

out$model <- factor(out$model, levels = c("Defence", "Finance & Economy", "Agriculture",  "Transport", "Health", "Education", "Welfare",
                                          "Trade", "Environment", "Children & Family", "Crime & Policing"))

out$model <- factor(out$model, levels = out$model[order(out$est)])

pdf("analysis/plots/figure_5.pdf", 6,3)
ggplot(out, aes(x = est, xmin = lo, xmax = hi, y = model)) +
  geom_point(aes()) + geom_errorbarh(height = .001) +
  theme_bw() +
  theme(axis.text = element_text(size = 10),
        axis.title = element_text(size = 10)) +
  xlab("Woman MP difference") +
  ylab("") + geom_vline(xintercept = 0, linetype = 2)
dev.off()

# Fixed effect models --------------------------------------------------------------------------------------------------------------

topic_types <-
  c(
    "women_dic_std",
    "defence_std",
    "economy_std",
    "agriculture_std",
    "health_std",
    "education_std",
    "social_std",
    "trade_std",
    "environment_std",
    "crime_std",
    "transport_std",
    "children_std"
  )

## Women only

speech_scores_women <-
  speech_scores[speech_scores$gender == "Female", ]

est_conf_ints_fes <- function(x = model_3) {
  coefs <- coef(summary(x))
  est <- coef(x)
  se <- sqrt(diag(vcovCR(x, cluster = "individual", type = "CR0")))
  hi <- est + 1.96 * se
  lo <- est - 1.96 * se
  return(data.frame(est, hi, lo))
  
}

coef_list_women <- list()
model_3_list <- list()
i <- 0
topic <- "women_dic_std"
for (topic in topic_types) {
  print(topic)
  i <- i + 1
  
  model_3 <-
    plm(
      as.formula(
        paste0(topic, "~ years_in_parliament + factor(parliamentary_term)")
      ),
      data = speech_scores_women,
      index = "person_id",
      model = "within",
      effect = "individual"
    )
  
  out <- list()
  out$model_3 <- est_conf_ints_fes(model_3)
  
  coef_list_women[[i]] <- out
  
}

## Men only

speech_scores_men <- speech_scores[speech_scores$gender == "Male", ]

coef_list_men <- list()
model_3_list <- list()
i <- 0
topic <- "women_dic_std"
for (topic in topic_types) {
  print(topic)
  i <- i + 1
  
  model_3 <-
    plm(
      as.formula(
        paste0(topic, "~ years_in_parliament + factor(parliamentary_term)")
      ),
      data = speech_scores_men,
      index = "person_id",
      model = "within",
      effect = "individual"
    )
  
  out <- list()
  out$model_3 <- est_conf_ints_fes(model_3)
  
  coef_list_men[[i]] <- out
  
}

save(coef_list_men, coef_list_women,
     file = "analysis/models/fes_time_fes.Rdata")

load("analysis/models/fes_time_fes.Rdata")

names(coef_list_women) <- topic_types

model_3_women_out <- lapply(1:length(coef_list_women), function(x) {
  tmp <- coef_list_women[[x]]$model_3
  tmp$topic <- names(coef_list_women)[x]
  tmp[rownames(tmp) == "years_in_parliament", ]
})

model_3_women_out <- data.frame(do.call("rbind", model_3_women_out))
model_3_women_out$model <- "Fixed Effect"
out_women <- model_3_women_out

out_women$topic[out_women$topic == "women_dic_std"] <- "Women"
out_women$topic[out_women$topic == "defence_std"] <- "Defence"
out_women$topic[out_women$topic == "economy_std"] <-"Finance & Economy"
out_women$topic[out_women$topic == "agriculture_std"] <- "Agriculture"
out_women$topic[out_women$topic == "health_std"] <- "Health"
out_women$topic[out_women$topic == "education_std"] <- "Education"
out_women$topic[out_women$topic == "social_std"] <- "Welfare"
out_women$topic[out_women$topic == "trade_std"] <- "Trade"
out_women$topic[out_women$topic == "environment_std"] <- "Environment"
out_women$topic[out_women$topic == "crime_std"] <-"Crime & Policing"
out_women$topic[out_women$topic == "transport_std"] <- "Transport"
out_women$topic[out_women$topic == "children_std"] <- "Children & Family"
out_women$topic <- factor(out_women$topic, 
                          levels = out_women$topic[out_women$model == "Fixed Effect"][order(out_women$est[out_women$model == "Fixed Effect"])])

names(coef_list_men) <- topic_types

model_3_men_out <- lapply(1:length(coef_list_men), function(x) {
  tmp <- coef_list_men[[x]]$model_3
  tmp$topic <- names(coef_list_men)[x]
  tmp[rownames(tmp) == "years_in_parliament", ]
})

model_3_men_out <- data.frame(do.call("rbind", model_3_men_out))
model_3_men_out$model <- "Fixed Effect"
out_men <- model_3_men_out

out_men$topic[out_men$topic == "women_dic_std"] <- "Women"
out_men$topic[out_men$topic == "defence_std"] <- "Defence"
out_men$topic[out_men$topic == "economy_std"] <- "Finance & Economy"
out_men$topic[out_men$topic == "agriculture_std"] <- "Agriculture"
out_men$topic[out_men$topic == "health_std"] <- "Health"
out_men$topic[out_men$topic == "education_std"] <- "Education"
out_men$topic[out_men$topic == "social_std"] <- "Welfare"
out_men$topic[out_men$topic == "trade_std"] <- "Trade"
out_men$topic[out_men$topic == "environment_std"] <- "Environment"
out_men$topic[out_men$topic == "crime_std"] <- "Crime & Policing"
out_men$topic[out_men$topic == "transport_std"] <- "Transport"
out_men$topic[out_men$topic == "children_std"] <-"Children & Family"
out_men$topic <- factor(out_men$topic, levels = out_men$topic[out_women$model == "Fixed Effect"][order(out_women$est[out_women$model == "Fixed Effect"])])

out_men <- out_men[out_men$model == "Fixed Effect", ]
out_men$gender <- c("Men")
out_women <- out_women[out_women$model == "Fixed Effect", ]
out_women$gender <- c("Women")
combined <- rbind(out_men, out_women)
combined$topic <- factor(combined$topic, 
                         levels = combined$topic[order(out_women$est - out_men$est, decreasing = T)])

pdf("analysis/plots/figure_6.pdf", 6.5, 3.5)
ggplot(combined, aes(
  x = est,
  xmin = lo,
  xmax = hi,
  y = topic,
  col = gender)) +
  geom_point(position = position_dodge(width = 0.3), 
             aes(shape = gender, color = gender)) +
  geom_errorbarh(height = .001, position = position_dodge(width = 0.3)) +
  theme_bw() +
  scale_color_manual(values = c("black", "grey40")) +
  labs(color = "MP gender", shape = "MP gender") +
  scale_shape_manual(values = c(19, 1)) +
  theme(
    axis.text = element_text(size = 10),
    axis.title = element_text(size = 10),
    legend.title = element_text(size = 10),
    legend.text = element_text(size = 10)) +
  xlab("Years in Parliament") +
  xlim(-0.05, 0.02) +
  ylab("") + geom_vline(xintercept = 0, linetype = 2)
dev.off()
